import lmdeploy
from lmdeploy import PytorchEngineConfig, TurbomindEngineConfig

# 配置float16精度
#backend_config = TurbomindEngineConfig(model_format='fp16')
# 关键配置：强制使用 float16，禁用 bfloat16
backend_config = TurbomindEngineConfig(
    dtype='float16'     # 指定模型精度为 float16
)

engine_config = PytorchEngineConfig(
    dtype='float16'     # 指定模型精度为 float16
)

# 创建pipeline时传入配置
model_dir = "/data/models/llm/models/internlm3-8b-instruct"
pipe = lmdeploy.pipeline(model_dir, backend_config=engine_config)

# 使用模型
response = pipe("Please tell me five scenic spots in Shanghai")
print(response)
